#This to obtain:
#Figure_4a
#Figure_4b
#Figure_4c
#Figure_A9a
#Figure_A9b
#Figure_A9c
#Figure_A10a
#Figure_A10b
#Figure_A10c

library("ggplot2")
library("glue")
library('gridExtra')
library('grid')
library('lfe')
library("readxl")
library("sf")
library("dplyr")

setwd("/Users/bgpopescu/")
#setwd("C:/Users/bogdanp/")

#Reading Polygons
x<-st_layers(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb")


HRV_adm0 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm0_wgs")

HRV_adm3 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm3")
HRV_adm3<-st_simplify(HRV_adm3, dTolerance = 0.005)
HRV_adm3<-subset(HRV_adm3, select = c(ID_2))
HRV_adm3_data = read_excel("./Dropbox/Legacies_Project/Analysis/data/merge.xlsx", sheet=1, col_names = TRUE, skip = 0)

final_sp<-left_join(HRV_adm3, HRV_adm3_data, by = c("ID_2"="ID_2"))
data<-subset(final_sp, bosnia_border_mun==0)
data$dist1 <- as.numeric( with (data,ifelse(data$treat==1, 1, -1)))
data$dist2 <-data$dist1*data$krajna6_distance


####################
# Helper functions #
####################
#Function to create individual graphs
comp_dist <- function(data_sample, y, x, spec, title_spec) {
  # Takes:
  #   - data_sample, a dataframe with the relevant data
  #   - y, a string with my dependent variable
  #   - x, a list of strings with my independent variables
  #   - spec, model specification
  #   - title_spec, label for the model specification (to appear on
  #                 the title of the graph)
  # Returns:
  #   Nothing, creates one picture in the folder. This function
  #   compares bandwidths in increments of 1 Km for the outcomes
  #   of interest (y) and the specified model. 
  
  # Define range of potential bandwidths
  sequence = seq(from = 10, to = 60, by = 1)
  
  #Create empty variables
  confint1 = rep(NA, length(sequence))
  confint2 = rep(NA, length(sequence))
  confint3 = rep(NA, length(sequence))
  confint4 = rep(NA, length(sequence))
  coef1 = rep(NA, length(sequence))
  
  
  # This loop calculates local linear estimates
  # for bandwidths from 15 to 150 kilometers
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(x, collapse = "+")))
  

  for (i in 1 : length(sequence)){
    band = data_sample[which(data_sample$dist2 > -sequence[i] & 
                               data_sample$dist2 < sequence[i]),]
    model1 <- felm(specification, paste(glue("|0|0|NAME_1")), data = band)
    confint1[i] = confint(model1)[2,1]
    confint2[i] = confint(model1)[2,2]
    confint3[i] = confint(model1, level = 0.90)[2,1]
    confint4[i] = confint(model1, level = 0.90)[2,2]  
    coef1[i] = coef(model1)[2]
  }
  
  df <- data.frame(sequence = sequence, coef1 = coef1, 
                   confint1 = confint1, confint2 = confint2,
                   confint3 = confint3, confint4 = confint4)
  
  p = 0.01
  #p = 2*sd(df$confint2, df$confint1)
  upper_mgn = max(p, 5*sd(df$confint2))
  lower_mgn = max(p, 5*sd(df$confint1))
  y_max<-max(upper_mgn, max(df$confint2) + upper_mgn, 0)
  y_min<-min(lower_mgn, min(df$confint1) - lower_mgn, 0)
  
  mean_feature <- ggplot(data = df, aes(sequence, coef1)) +
    geom_point(size = 1.3) +
    geom_errorbar(aes(ymin = confint1, ymax = confint2), size = 0.2, width = 0)+
    geom_errorbar(aes(ymin = confint3, ymax = confint4), size = 0.5, width = 0)+
    geom_hline(yintercept = 0) +
    theme_bw() +
    scale_x_continuous(name = "Bandwidth (km)", breaks=seq(10,60,10)) +
    #print(max((df$confint4)+min(df$confint4))
    
    scale_y_continuous(name = "Estimate", limits = c(y_min, 
                                                     y_max)) +
    ggtitle(glue("{title_spec}")) +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size=14),
          axis.text.y = element_text(size=14),
          axis.title=element_text(size=14))+
    annotate(geom = "text", -Inf, Inf, label = c(paste(" Civilian Territory N =", nrow(data_sample[data_sample$treat==0,]),"\n", 
                                                       "Military Territory N =", nrow(data_sample[data_sample$treat==1,]))), hjust = 0, vjust = 1)

  return(mean_feature)
}


#Function to create group graphs
create_graphs_per_specifications_1 <- function(regressors, specifications, 
                                             specification_label, data_sample, data_sample_name){
  # Takes:
  #   - regressors, a list of dependent variables
  #   - specifications, lists of model specifications (indep variables)
  #   - specification_label, a list of labels for model specifications
  #   - data_sample, the dataset of interest
  #   - data_sample_name, label for the data_sample
  
  # Returns:
  #   Creates one grid of pictures in the folder. This function
  #   allows us to vary the regressors.
  
  for (y in regressors) {
    regressor_name  = y[[1]]
    regressor_label = y[[2]]
    figure_name = y[[3]]
    
    print(glue("Using dependent variable {regressor_label}.."))
    graphs <- list()
    for (spec in names(specifications)){
      print(spec)
      print(specification_label[[spec]])
      temp_graph <- comp_dist(data_sample, regressor_name, 
                              specifications[[spec]], spec, 
                              specification_label[[spec]])
      graphs[[paste(c(spec, regressor_name), collapse = '_')]] <- temp_graph
    }
    
    #Ncol may need to be changed
    grid <- grid.arrange(grobs = graphs, nrow = 2, ncol = 1,
                         top = textGrob({},
                                        gp = gpar(fontsize = 20, font = 2)))
    ggsave(grid, file = glue("Figure{figure_name}{data_sample_name}.jpg"), 
           height = 10, width = 10, 
           units = "cm", dpi = 250)
  }
  
}


########
#MODELS#
########


# Define model specifications
specifications <- list(c('treat'),
                       c('treat', 'POINT_X', 'POINT_Y', 'zagreb_distance', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7', 'quad1', 'quad2', 'quad3'))
                       

names(specifications) <- c('mean', "meanbfedist")

specification_label<-c("Treat", 
                       "Treat + Lat-Lon + Dist. Zagreb + BFE")

names(specification_label)<-c('mean', "meanbfedist")

regressors<-list(c("pct_no_water", "Pct. Dwellings No Water, 2011", "_4"))

sample_1 <- data
sample_2 <- data[ which(data$krajna6_river_NEAR_FID==6), ]
sample_3 <- data[ which(data$krajna6_river_NEAR_FID==8), ]


#shortcut = glue("./Dropbox/Legacies_Project/Paper/figures/")
#setwd(shortcut)

create_graphs_per_specifications_1(regressors, specifications, 
                                   specification_label, sample_1, 'a')
create_graphs_per_specifications_1(regressors, specifications, 
                                   specification_label, sample_2, 'b')
create_graphs_per_specifications_1(regressors, specifications, 
                                 specification_label, sample_3, 'c')

##############
#Long Version#
##############

####################
# Helper functions #
####################
#Function to create individual graphs

create_graphs_per_specifications_2 <- function(regressors, specifications, 
                                               specification_label, data_sample, data_sample_name){
  # Takes:
  #   - regressors, a list of dependent variables
  #   - specifications, lists of model specifications (indep variables)
  #   - specification_label, a list of labels for model specifications
  #   - data_sample, the dataset of interest
  #   - data_sample_name, label for the data_sample
  
  # Returns:
  #   Creates one grid of pictures in the folder. This function
  #   allows us to vary the regressors.
  
  for (y in regressors) {
    regressor_name  = y[[1]]
    regressor_label = y[[2]]
    figure_name = y[[3]]
    
    print(glue("Using dependent variable {regressor_label}.."))
    graphs <- list()
    for (spec in names(specifications)){
      print(spec)
      print(specification_label[[spec]])
      temp_graph <- comp_dist(data_sample, regressor_name, 
                              specifications[[spec]], spec, 
                              specification_label[[spec]])
      graphs[[paste(c(spec, regressor_name), collapse = '_')]] <- temp_graph
    }
    
    #Ncol may need to be changed
    grid <- grid.arrange(grobs = graphs, nrow = 6, ncol = 1,
                         top = textGrob({},
                                        gp = gpar(fontsize = 20, font = 2)))
    ggsave(grid, file = glue("Figure{figure_name}{data_sample_name}.jpg"), 
           height = 40, width = 10, 
           units = "cm", dpi = 300)
  }
  
}


# Define model specifications
specifications <- list(c('treat'),
                       c('treat', 'POINT_X', 'POINT_Y', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7'),
                       c('treat', 'krajna6_distance', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7'),
                       c('treat', 'POINT_X', 'POINT_Y', 'krajna6_distance', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7', 'quad1', 'quad2', 'quad3'),
                       c('treat', 'krajna6_distance*krajna6_distance', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7'),
                       c('treat', 'POINT_X', 'POINT_Y', 'krajna6_distance*krajna6_distance', 'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7'))


names(specifications) <- c('mean', "mean_latlon_bfe", "mean_dist_bfe", 
                           "mean_latlon_dist_bfe", "mean_quaddist_bfe",
                           "mean_latlon_quaddist_bfe")

specification_label<-c("Treat", 
                       "Treat + Lat-Lon + BFE",
                       "Treat + Dist. Brd. + BFE",
                       "Treat + Lat-Lon + Dist. Brd. + BFE",
                       "Treat + Qud. Dist. Brd. + BFE",
                       "Treat + Lat-Lon + Quad. Dist. Brd. + BFE")

names(specification_label)<-names(specifications)


regressors<-list(c("pct_no_water", "Pct. Dwellings No Water, 2011", "A9"))

sample_1 <- data
sample_2 <- data[ which(data$krajna6_river_NEAR_FID==6), ]
sample_3 <- data[ which(data$krajna6_river_NEAR_FID==8), ]


shortcut = glue("./Dropbox/Legacies_Project/Paper/figures/")
setwd(shortcut)

create_graphs_per_specifications_2(regressors, specifications, 
                                   specification_label, sample_1, '_a')
create_graphs_per_specifications_2(regressors, specifications, 
                                   specification_label, sample_2, '_b')
create_graphs_per_specifications_2(regressors, specifications, 
                                   specification_label, sample_3, '_c')

#Intermediary Outcomes

regressors<-list(c("pct_military_1857", "Pct. Military, 1857", "A10_a"),
                 c("pop_zadrugas", "Pop. in Zadruga, 1895", "A10_b"),
                 c("railroads_1869_density", "Density Railroads, 1869", "A10_c"))


create_graphs_per_specifications_2(regressors, specifications, 
                                   specification_label, sample_1, "")
